This analysis evaluates chromosomal instability by using breakpoint SV and CNV data that was formatted by 00-setup-breakpoint_data.R and mapping chromosomal break density. This notebook returns chromosomal break heatmaps and plots for each tumor type group in the plots directory.
This notebook can be run via the command line from the top directory of the repository as follows:
Rscript -e "rmarkdown::render('analyses/chromosomal-instability/plot-chromosomal-instability.Rmd',
clean = TRUE)"
rr # Path to data directory data_dir <- file.path(.., .., ) scratch_dir <- file.path(.., .., )
plots_dir <-
hist_plots_dir <- file.path(plots_dir, -type)
if (!dir.exists(hist_plots_dir)) { dir.create(hist_plots_dir, recursive = TRUE) }
Read in the custom functions needed for this analysis.
rr # Read in the metadata metadata <- readr::read_tsv(file.path(data_dir, -histologies.tsv))
Parsed with column specification:
cols(
.default = col_character(),
OS_days = [32mcol_double()[39m,
age_last_update_days = [32mcol_double()[39m,
normal_fraction = [32mcol_double()[39m,
tumor_fraction = [32mcol_double()[39m,
tumor_ploidy = [32mcol_double()[39m,
molecular_subtype = [33mcol_logical()[39m
)
See spec(...) for full column specifications.
221 parsing failures.
row col expected actual file
2606 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
2607 molecular_subtype 1/0/T/F/TRUE/FALSE Group4 '../../data/pbta-histologies.tsv'
2608 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
2609 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
2610 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
.... ................. .................. ...... .................................
See problems(...) for more details.
rr breaks_list <- readr::read_rds(file.path(-data, _lists.RDS))
Set up metadata
rr common_samples <- unique(breaks_list\(common_breaks\)samples)
Load in the previously formatted breakpoint data.
breaks_list <- readr::read_rds(file.path("breakpoint-data", "breaks_lists.rds"))
Get a vector of the biospecimen IDs.
rr # Change here and it will change the rest bin_size <- 1e6
sample_densities <- lapply(common_samples, function(sample_id) { lapply(breaks_list, function(breaks_df) { break_density(breaks_df, sample_id = sample_id, start_col = , end_col = , window_size = bin_size, chr_sizes_vector = chr_sizes_vector ) }) })
Set up chromosome size data. It just so happens that this BED file: WGS.hg38.strelka2.unpadded.bed is actually just a list of the chromosome sizes so we are using that for now.
rr # Save to an RDS file readr::write_rds( sample_densities, file.path(scratch_dir, _breakpoint_densities.RDS) )
For each sample, we will bin the genome and count how many chromosome breaks occur for each bin, given the given bin_size.
We will run this for each sample and return a list of three GenomicRanges objects: 1) common_breaks contains the combined break counts for both SV and CNV break data.
2) cnv_breaks contains the number of break counts for CNV.
3) sv_breaks contains the number of break counts for SV.
rr # Extract chromosome names from a GenomicRanges object chr_labels <- sample_densities[[1]]$common_breaks@seqnames chr_labels <- paste0(, as.factor(chr_labels))
chr_colors <- rep(c(, ), length.out = length(unique(chr_labels))) names(chr_colors) <- unique(chr_labels)
chr_annot <- ComplexHeatmap::HeatmapAnnotation( df = data.frame(chr_labels), col = list(chr_labels = c(chr_colors)), name = \, show_legend = FALSE, show_annotation_name = FALSE )
Save to an RDS file
rr # Get the histologies for the samples in this set and order them by histology histologies <- data.frame(Kids_First_Biospecimen_ID = common_samples) %>% dplyr::inner_join(dplyr::select(metadata, Kids_First_Biospecimen_ID, short_histology)) %>% dplyr::mutate(short_histology = as.factor(short_histology)) %>% dplyr::arrange(short_histology) %>% tibble::column_to_rownames(_First_Biospecimen_ID)
Joining, by = \Kids_First_Biospecimen_ID\
Column `Kids_First_Biospecimen_ID` joining factor and character vector, coercing into character vector
rr # Create the Heatmap annotation object hist_annot <- ComplexHeatmap::rowAnnotation( df = histologies, show_annotation_name = FALSE )
Given the GenomicRanges objects for each sample, create a combined plot for each.
Make chromosome labeling HeatmapAnnotation object.
rr col_fun <- circlize::colorRamp2( c(0, 2, 5, 10, 20), c(#edf8fb, #b2e2e2, #66c2a4, #2ca25f, #006d2c) )
Make histology labeling HeatmapAnnotation object.
rr breaks_heatmap <- function(data_name) { # A wrapper function for making a heatmap from the samples GenomicRanges list. # # Args: # data_name: a character string that matches a name in the list.
# Returns: # A heatmap of the chromosomal breaks
# Pull out the total_counts total_counts <- lapply(sample_densities, function(granges_list) { granges_list[[data_name]]@elementMetadata@listData$total_counts }) # Make into a data.frame total_counts <- dplyr::bind_rows(total_counts) %>% dplyr::select(rownames(histologies)) %>% t() # Add chromosome labels colnames(total_counts) <- chr_labels # Plot on a heatmap heatmap <- ComplexHeatmap::Heatmap(total_counts, col = col_fun, heatmap_legend_param = list(title = of chr breaks), cluster_columns = FALSE, cluster_rows = FALSE, show_column_names = FALSE, show_row_names = FALSE, bottom_annotation = chr_annot, left_annotation = hist_annot ) # Return plot return(heatmap) }
Make a color function.
rr common_heatmap <- breaks_heatmap(data_name = _breaks)
png(file.path(plots_dir, paste0(_breaks_heatmap.png)), width = 1200, height = 900, units =
) common_heatmap dev.off()
null device
1
rr # Print out here common_heatmap
Make a function for making the heatmaps.
rr cnv_heatmap <- breaks_heatmap(data_name = _breaks)
png(file.path(plots_dir, paste0(_breaks_heatmap.png)), width = 1200, height = 900, units =
) cnv_heatmap dev.off()
null device
1
rr # Print out here cnv_heatmap
rr sv_heatmap <- breaks_heatmap(data_name = _breaks)
png(file.path(plots_dir, paste0(_breaks_heatmap.png)), width = 1200, height = 900, units =
) sv_heatmap dev.off()
null device
1
rr # Print out here sv_heatmap
rr # Change bin_size here and it will change the rest bin_size <- 1e6
tumor_types <- metadata %>% dplyr::filter(!is.na(short_histology), experimental_strategy != -Seq) %>% dplyr::distinct(short_histology) %>% dplyr::pull(short_histology)
rr # Get a big list of break densities for each sample. group_densities <- lapply(tumor_types, function(tumor_type) {
# Obtain a list of sample_ids to subset by sample_ids <- metadata %>% dplyr::filter(metadata$short_histology == tumor_type) %>% dplyr::pull(Kids_First_Biospecimen_ID)
# Double check these samples are in the list check_samples <- (sample_ids %in% common_samples)
# If no samples, go to next if (sum(check_samples) > 1) { message(paste(breakpoint density for, tumor_type, )) lapply(breaks_list, function(breaks_df) { break_density(breaks_df, sample_id = sample_ids, start_col = , end_col = , window_size = bin_size, chr_sizes_vector = chr_sizes_vector ) }) } })
Calculating breakpoint density for Ependymoma samples
Calculating breakpoint density for HGAT samples
Calculating breakpoint density for LGAT samples
Calculating breakpoint density for Other samples
Calculating breakpoint density for CNS sarcoma samples
Calculating breakpoint density for Schwannoma samples
Calculating breakpoint density for ATRT samples
Calculating breakpoint density for Choroid plexus tumor samples
Calculating breakpoint density for Craniopharyngioma samples
Calculating breakpoint density for DNET samples
Calculating breakpoint density for Teratoma samples
Calculating breakpoint density for Hemangioblastoma samples
Calculating breakpoint density for Meningioma samples
Calculating breakpoint density for Adenoma samples
Calculating breakpoint density for Neurofibroma samples
Calculating breakpoint density for Ganglioglioma samples
Calculating breakpoint density for Langerhans Cell histiocytosis samples
Calculating breakpoint density for CNS Embryonal tumor samples
Calculating breakpoint density for CNS neuroblastoma samples
Calculating breakpoint density for Chordoma samples
Calculating breakpoint density for MPNST samples
Calculating breakpoint density for Glial-neuronal tumor NOS samples
Calculating breakpoint density for Pineoblastoma samples
Calculating breakpoint density for CNS EFT-CIC samples
Calculating breakpoint density for Central neurocytoma samples
Calculating breakpoint density for Germinoma samples
Calculating breakpoint density for CNS Rhabdomyosarcoma samples
Calculating breakpoint density for Dysplasia samples
Calculating breakpoint density for Oligodendroglioma samples
Calculating breakpoint density for Hemangioma samples
Calculating breakpoint density for LGMT samples
Calculating breakpoint density for CNS ganglioneuroblastoma samples
Calculating breakpoint density for Medulloblastoma samples
rr # Bring along the tumor-type labels names(group_densities) <- tumor_types
group_densities <- group_densities[!sapply(group_densities, is.null)]
Same as was done for each sample, now we will calculate densities for each tumor-type group.
rr # Save to an RDS file readr::write_rds( group_densities, file.path(scratch_dir, _breakpoint_densities.RDS) )
Run the density calculations for the groups.
rr purrr::imap(group_densities, function(.x, name = .y) { # Make the combo plot multipanel_break_plot( granges_list = .x, plot_name = name, y_val = _counts, y_lab = Breaks per Mb, plot_dir = hist_plots_dir ) })
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
$Ependymoma
$HGAT
$LGAT
$Other
$`CNS sarcoma`
$Schwannoma
$ATRT
$`Choroid plexus tumor`
$Craniopharyngioma
$DNET
$Teratoma
$Hemangioblastoma
$Meningioma
$Adenoma
$Neurofibroma
$Ganglioglioma
$`Langerhans Cell histiocytosis`
$`CNS Embryonal tumor`
$`CNS neuroblastoma`
$Chordoma
$MPNST
$`Glial-neuronal tumor NOS`
$Pineoblastoma
$`CNS EFT-CIC`
$`Central neurocytoma`
$Germinoma
$`CNS Rhabdomyosarcoma`
$Dysplasia
$Oligodendroglioma
$Hemangioma
$LGMT
$`CNS ganglioneuroblastoma`
$Medulloblastoma
Save to an RDS file
rr # Declare name of zip file zip_file <- paste0(hist_plots_dir, .zip)
if (file.exists(zip_file)) { file.remove(zip_file) }
[1] TRUE
rr # Zip up the plots zip(zip_file, hist_plots_dir)
adding: plots/tumor-type/ (stored 0%)
adding: plots/tumor-type/Ependymoma_breaks.png (deflated 16%)
adding: plots/tumor-type/CNS Embryonal tumor_breaks.png (deflated 15%)
adding: plots/tumor-type/Glial-neuronal tumor NOS_breaks.png (deflated 14%)
adding: plots/tumor-type/ATRT_breaks.png (deflated 15%)
adding: plots/tumor-type/Medulloblastoma_breaks.png (deflated 15%)
adding: plots/tumor-type/Oligodendroglioma_breaks.png (deflated 12%)
adding: plots/tumor-type/LGMT_breaks.png (deflated 11%)
adding: plots/tumor-type/.DS_Store (deflated 97%)
adding: plots/tumor-type/HGAT_breaks.png (deflated 15%)
adding: plots/tumor-type/Hemangioma_breaks.png (deflated 12%)
adding: plots/tumor-type/LGAT_breaks.png (deflated 16%)
adding: plots/tumor-type/Pineoblastoma_breaks.png (deflated 13%)
adding: plots/tumor-type/Dysplasia_breaks.png (deflated 16%)
adding: plots/tumor-type/MPNST_breaks.png (deflated 13%)
adding: plots/tumor-type/Central neurocytoma_breaks.png (deflated 10%)
adding: plots/tumor-type/CNS neuroblastoma_breaks.png (deflated 13%)
adding: plots/tumor-type/Neurofibroma_breaks.png (deflated 15%)
adding: plots/tumor-type/Chordoma_breaks.png (deflated 16%)
adding: plots/tumor-type/Schwannoma_breaks.png (deflated 16%)
adding: plots/tumor-type/Hemangioblastoma_breaks.png (deflated 14%)
adding: plots/tumor-type/DNET_breaks.png (deflated 16%)
adding: plots/tumor-type/Langerhans Cell histiocytosis_breaks.png (deflated 13%)
adding: plots/tumor-type/CNS Rhabdomyosarcoma_breaks.png (deflated 11%)
adding: plots/tumor-type/Meningioma_breaks.png (deflated 15%)
adding: plots/tumor-type/Ganglioglioma_breaks.png (deflated 16%)
adding: plots/tumor-type/CNS sarcoma_breaks.png (deflated 11%)
adding: plots/tumor-type/Germinoma_breaks.png (deflated 15%)
adding: plots/tumor-type/Choroid plexus tumor_breaks.png (deflated 14%)
adding: plots/tumor-type/Craniopharyngioma_breaks.png (deflated 16%)
adding: plots/tumor-type/Teratoma_breaks.png (deflated 15%)
adding: plots/tumor-type/CNS EFT-CIC_breaks.png (deflated 16%)
adding: plots/tumor-type/Other_breaks.png (deflated 16%)
adding: plots/tumor-type/Adenoma_breaks.png (deflated 12%)
adding: plots/tumor-type/CNS ganglioneuroblastoma_breaks.png (deflated 14%)
Here we will plot median number of break points for the tumor-type group per each bin.
rr sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 9 (stretch)
Matrix products: default
BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
[4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] optparse_1.6.2 GenomicRanges_1.38.0 GenomeInfoDb_1.22.0 IRanges_2.20.1 S4Vectors_0.24.1
[6] BiocGenerics_0.32.0
loaded via a namespace (and not attached):
[1] colorspace_1.4-1 rjson_0.2.20 rprojroot_1.3-2
[4] biovizBase_1.34.1 circlize_0.4.8 htmlTable_1.13.1
[7] XVector_0.26.0 GlobalOptions_0.1.1 base64enc_0.1-3
[10] dichromat_2.0-0 clue_0.3-57 rstudioapi_0.10
[13] getopt_1.20.3 bit64_0.9-7 AnnotationDbi_1.48.0
[16] fansi_0.4.0 splines_3.6.0 R.methodsS3_1.7.1
[19] ggbio_1.34.0 knitr_1.23 zeallot_0.1.0
[22] Formula_1.2-3 jsonlite_1.6 Rsamtools_2.2.1
[25] dbplyr_1.4.2 cluster_2.1.0 png_0.1-7
[28] R.oo_1.22.0 graph_1.62.0 BiocManager_1.30.4
[31] httr_1.4.0 readr_1.3.1 compiler_3.6.0
[34] backports_1.1.4 assertthat_0.2.1 Matrix_1.2-17
[37] lazyeval_0.2.2 cli_1.1.0 acepack_1.4.1
[40] htmltools_0.3.6 prettyunits_1.0.2 tools_3.6.0
[43] gtable_0.3.0 glue_1.3.1 GenomeInfoDbData_1.2.2
[46] reshape2_1.4.3 dplyr_0.8.3 rappdirs_0.3.1
[49] Rcpp_1.0.1 Biobase_2.46.0 styler_1.1.1
[52] vctrs_0.1.0 Biostrings_2.54.0 rtracklayer_1.46.0
[55] xfun_0.8 stringr_1.4.0 ensembldb_2.10.2
[58] XML_3.98-1.20 zlibbioc_1.32.0 scales_1.0.0
[61] BSgenome_1.54.0 VariantAnnotation_1.32.0 ProtGenerics_1.18.0
[64] hms_0.4.2 RBGL_1.62.1 SummarizedExperiment_1.16.0
[67] AnnotationFilter_1.10.0 rematch2_2.0.1 RColorBrewer_1.1-2
[70] curl_3.3 ComplexHeatmap_2.2.0 yaml_2.2.0
[73] memoise_1.1.0 gridExtra_2.3 ggplot2_3.2.0
[76] biomaRt_2.42.0 rpart_4.1-15 reshape_0.8.8
[79] latticeExtra_0.6-28 stringi_1.4.3 RSQLite_2.1.1
[82] checkmate_1.9.4 GenomicFeatures_1.38.0 BiocParallel_1.20.0
[85] shape_1.4.4 rlang_0.4.0 pkgconfig_2.0.2
[88] bitops_1.0-6 matrixStats_0.55.0 evaluate_0.14
[91] lattice_0.20-38 purrr_0.3.2 labeling_0.3
[94] GenomicAlignments_1.22.1 htmlwidgets_1.3 cowplot_0.9.4
[97] bit_1.1-14 tidyselect_0.2.5 GGally_1.4.0
[100] plyr_1.8.4 magrittr_1.5 R6_2.4.0
[103] Hmisc_4.2-0 DelayedArray_0.12.0 DBI_1.0.0
[106] pillar_1.4.2 foreign_0.8-71 withr_2.1.2
[109] survival_2.44-1.1 RCurl_1.95-4.12 nnet_7.3-12
[112] tibble_2.1.3 crayon_1.3.4 utf8_1.1.4
[115] OrganismDbi_1.28.0 BiocFileCache_1.10.2 rmarkdown_1.13
[118] GetoptLong_0.1.7 progress_1.2.2 grid_3.6.0
[121] data.table_1.12.2 blob_1.1.1 digest_0.6.20
[124] tidyr_0.8.3 R.utils_2.9.0 openssl_1.4
[127] munsell_0.5.0 askpass_1.1
Zip up the PNG files into one file.
# Declare name of zip file
zip_file <- paste0(hist_plots_dir, ".zip")
# Remove any current zip_file of this name so we can overwrite it
if (file.exists(zip_file)) {
file.remove(zip_file)
}
# Zip up the plots
zip(zip_file, hist_plots_dir)
sessionInfo()